# import libraries
import altair as alt
import pandas as pd
# Load the dataset into a Pandas DataFrame
data = pd.read_excel('SFTT.xlsx')
# Remove white spaces from column names
data = data.rename(columns=lambda x: x.strip())
data = data[data['Tree Present'] == True]
data = data.astype({'Postal Code': 'string'})
data['Postal Code'] = data['Postal Code'].apply(lambda x: '{0:0>7}'.format(x))
data['Postal Code'] = data['Postal Code'].str[:-2]
# Drop rows with missing values in the 'Species' column
if 'Species' in data.columns:
data = data.dropna(subset=['Species'])
# The color messed with the data type
data = data.rename(columns = {'Tree: Tree Condition': "Tree Condition"})
# Clean our data, take out postal codes that are too far from the Mass Ave Corridor
data = data[~data['Postal Code'].isin(['02136', '00021', '000<N', '02129', '02109', '02113', '02122', '02128', '02201', '02215', '02445'])]
postal_codes = data['Postal Code'].unique()
dropdown = alt.binding_select(options = postal_codes)
selection = alt.selection_single(
fields=["Postal Code"],
bind=dropdown,
name="Postal Codes",
)
C:\Users\ethan\anaconda3\lib\site-packages\altair\utils\deprecation.py:65: AltairDeprecationWarning: 'selection_single' is deprecated. Use 'selection_point' warnings.warn(message, AltairDeprecationWarning, stacklevel=1)
brush = alt.selection_interval()
trees = alt.Chart(data).mark_circle(size=10).encode(
longitude="Point X:Q",
latitude="Point Y:Q",
color="Postal Code",
size=alt.Size('Diameter:Q', scale=alt.Scale(range=[10, 150])),
tooltip=["Genus","Common Name", "Postal Code", "Species"]
).add_params(brush).add_selection(selection
).transform_filter(selection
).properties(title='Trees Across Boston', width=300,
height=300)
# Create an interactive histogram layer to show the count of trees by zip code
histogram = alt.Chart(data).mark_bar().encode(
alt.X('count():Q', title='Tree Count'),
alt.Y('Postal Code:N', title='Zip Code'),
color = 'Postal Code:N'
#color=alt.condition(
#alt.datum.zip_code == trees.encoding['color'].field,
# alt.value('steelblue'),
# alt.value('lightgray')
).transform_filter(brush).properties(title='Number of Trees by Postal Code', height=300)
common_trees = alt.Chart(data).mark_bar().encode(
alt.X('Genus:N', title='Genus'),
alt.Y('count():Q', title='Count'),
color='Postal Code'
).transform_filter(brush).properties(title='Number of Trees by Genus', width=425)
trees & histogram | common_trees
C:\Users\ethan\anaconda3\lib\site-packages\altair\utils\deprecation.py:65: AltairDeprecationWarning: 'add_selection' is deprecated. Use 'add_params' instead. warnings.warn(message, AltairDeprecationWarning, stacklevel=1)
# CODE FOR TREEMAP, WASN'T WORKING YET
# base = (
# alt.Chart(source)
# .transform_aggregate(count_="count()", groupby=["Origin", "Cylinders"])
# .transform_stack(
# stack="count_",
# as_=["stack_count_Origin1", "stack_count_Origin2"],
# offset="normalize",
# sort=[alt.SortField("Origin", "ascending")],
# groupby=[],
# )
# .transform_window(
# x="min(stack_count_Origin1)",
# x2="max(stack_count_Origin2)",
# rank_Cylinders="dense_rank()",
# distinct_Cylinders="distinct(Cylinders)",
# groupby=["Origin"],
# frame=[None, None],
# sort=[alt.SortField("Cylinders", "ascending")],
# )
# .transform_window(
# rank_Origin="dense_rank()",
# frame=[None, None],
# sort=[alt.SortField("Origin", "ascending")],
# )
# .transform_stack(
# stack="count_",
# groupby=["Origin"],
# as_=["y", "y2"],
# offset="normalize",
# sort=[alt.SortField("Cylinders", "ascending")],
# )
# .transform_calculate(
# ny="datum.y + (datum.rank_Cylinders - 1) * datum.distinct_Cylinders * 0.01 / 3",
# ny2="datum.y2 + (datum.rank_Cylinders - 1) * datum.distinct_Cylinders * 0.01 / 3",
# nx="datum.x + (datum.rank_Origin - 1) * 0.01",
# nx2="datum.x2 + (datum.rank_Origin - 1) * 0.01",
# xc="(datum.nx+datum.nx2)/2",
# yc="(datum.ny+datum.ny2)/2",
# )
# )
# rect = base.mark_rect().encode(
# x=alt.X("nx:Q").axis(None),
# x2="nx2",
# y="ny:Q",
# y2="ny2",
# color=alt.Color("Origin:N").legend(None),
# opacity=alt.Opacity("Cylinders:Q").legend(None),
# tooltip=["Origin:N", "Cylinders:Q"],
# )
# text = base.mark_text(baseline="middle").encode(
# alt.X("xc:Q").axis(None),
# alt.Y("yc:Q").title("Cylinders"),
# text="Cylinders:N"
# )
# mosaic = rect + text
# origin_labels = base.mark_text(baseline="middle", align="center").encode(
# alt.X("min(xc):Q").title("Origin").axis(orient="top"),
# alt.Color("Origin").legend(None),
# text="Origin",
# )
# (
# (origin_labels & mosaic)
# .resolve_scale(x="shared")
# .configure_view(stroke="")
# .configure_concat(spacing=10)
# .configure_axis(domain=False, ticks=False, labels=False, grid=False)
# )
chart = alt.vconcat(trees & histogram | common_trees)
chart.save('viz.html')